# -*- coding: utf-8 -*-
import scrapy,sys,os
reload(sys).setdefaultencoding('UTF-8')
from njupt.items import ZmsItem
import logging
from pyquery import PyQuery as pq


class crcaSpider(scrapy.Spider):
		name = "zms"
		allowed_domains = ["www.zanmeishi.com"]
		'''start_urls = [
				"http://www.crca.com.cn/list.aspx?cid=20&page=1",
				]'''
		
		def start_requests(self):
				for i in range(1,1306):
					next_page_url = "http://www.zanmeishi.com/tab/0_0_0_0_0_0_0_0_%s.html" %(i)
					yield scrapy.Request(next_page_url,callback=self.parse)

		def parse(self,response):
			if response.status == 200:
				body = response.body
				doc = pq(body)
				for link in doc('.tabslist li h3').find('a'):
					item = ZmsItem()
					item['title'] = pq(link).text()
					request = scrapy.Request('http://www.zanmeishi.com'+ pq(link).attr('href'),callback=self.parse_content)
					request.meta['item'] = item
					yield request

		def parse_content(self,response):
			if response.status == 200: 
				body = response.body
				doc = pq(body)
				item = bItem()
				item = response.meta['item']
				item['image_urls'] = item['url'] = doc('.img_tab a').attr('href')
				#item['image_urls'].append(item['url'])
				logging.info("标题:%s" %(item['title']))
				yield item

